1 Data check

1.1 Load packages

library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("tidyr")
library("ggplot2")
library("stringr")
library("tigerstats")
## Loading required package: abd
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
## Loading required package: lattice
## Loading required package: grid
## Loading required package: mosaic
## Loading required package: ggformula
## Loading required package: ggstance
## 
## Attaching package: 'ggstance'
## The following objects are masked from 'package:ggplot2':
## 
##     geom_errorbarh, GeomErrorbarh
## 
## New to ggformula?  Try the tutorials: 
##  learnr::run_tutorial("introduction", package = "ggformula")
##  learnr::run_tutorial("refining", package = "ggformula")
## Loading required package: mosaicData
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Registered S3 method overwritten by 'mosaic':
##   method                           from   
##   fortify.SpatialPolygonsDataFrame ggplot2
## 
## The 'mosaic' package masks several functions from core packages in order to add 
## additional features.  The original behavior of these functions should not be affected by this.
## 
## Note: If you use the Matrix package, be sure to load it BEFORE loading mosaic.
## 
## Have you tried the ggformula package for your plots?
## 
## Attaching package: 'mosaic'
## The following object is masked from 'package:Matrix':
## 
##     mean
## The following object is masked from 'package:ggplot2':
## 
##     stat
## The following objects are masked from 'package:dplyr':
## 
##     count, do, tally
## The following objects are masked from 'package:stats':
## 
##     binom.test, cor, cor.test, cov, fivenum, IQR, median,
##     prop.test, quantile, sd, t.test, var
## The following objects are masked from 'package:base':
## 
##     max, mean, min, prod, range, sample, sum
## Welcome to tigerstats!
## To learn more about this package, consult its website:
##  http://homerhanumat.github.io/tigerstats
library("ez")
## Registered S3 methods overwritten by 'lme4':
##   method                          from
##   cooks.distance.influence.merMod car 
##   influence.merMod                car 
##   dfbeta.influence.merMod         car 
##   dfbetas.influence.merMod        car

1.2 Read data

df <- read.csv("../data/c-center_16th_Sep.csv", header = TRUE, sep = ",")
load("../data/speech_rate.Rda")

1.3 Merge mview output with speech rate dataframe

df <- df %>%
  right_join(df_sr, by=c("subj", "fname"))
rm(df_sr)

1.4 Tidy the data

# extract info from filename, merge trialnumber and repetition
df<- df %>%
    separate(fname, c("project", "prompt", "block", "remove", "remove2", "trialno", "repetition"), sep = "_") %>%
    mutate(trial=str_c(trialno,repetition)) 

Drop unwanted columns.

df <- df %>%
select(-c(project, remove, remove2, x1, x2, x3, x4, x5, x6, x7, y1, y2, y3, y4, y5, y6, y7, z1, z2, z3, z4, z5, z6, z7, v1, v2, v3, v4, v5, v6, v7, X.1, X.2, X.3, DOns, DTarg, DOff))

Create new variable recording.no to indicate the recording number (first recording, second recording etc..).

trialno <- df %>% 
group_by(subj) %>% 
distinct(trial) %>% 
arrange(trial, by_group = TRUE) %>%
mutate(recording.no = 1:n()) %>% 
select(recording.no, everything()) %>% 
arrange(subj, recording.no)

Bind trialno with dataframe.

df <- df %>% 
  inner_join(trialno, by = c("subj", "trial")) %>% 
  select(recording.no, everything()) %>% 
  arrange(subj, recording.no)
rm(trialno)

Create group variable.

df$group <- ifelse((str_detect(df$subj, "CTRL")), "CTRL", "PD")
df$group <-  as.factor(df$group)

Create condition based on prompt.

df$condition <- "C"
df$condition[df$prompt == "oma spat" | df$prompt == "opa Smat" |df$prompt == "opa schat" | df$prompt == "oma's prak"] <- "CC"
df$condition[df$prompt == "oma sprak"] <- "CCC"
df$condition <-  as.factor(df$condition)

Create cluster column.

df$cluster[df$prompt == "oma spat" | df$prompt == "oma's pad"] <- "sp" 
df$cluster[df$prompt == "opa schat" | df$prompt == "opa's gat"] <- "sx" 
df$cluster[df$prompt == "opa Smat" | df$prompt == "opa's mat"] <- "sm" 
df$cluster[df$prompt == "oma sprak" | df$prompt == "oma's prak"] <- "spr" 
df$cluster <- as.factor(df$cluster)
levels(df$cluster)
## [1] "sm"  "sp"  "spr" "sx"

Remove faulty segmentation, based on outlier column.

df <- df[is.na(df$outl),]

Check whether the conditions are well coded and whether the segmentation involves the correct articulators.

table(df$prompt, df$condition)
##             
##                 C   CC  CCC
##   oma spat      0 1814    0
##   oma sprak     0    0 1677
##   oma's pad  1797    0    0
##   oma's prak    0 1704    0
##   opa schat     0 1695    0
##   opa Smat      0 1789    0
##   opa's gat  1697    0    0
##   opa's mat  1762    0    0
table(df$prompt, df$cluster)
##             
##                sm   sp  spr   sx
##   oma spat      0 1814    0    0
##   oma sprak     0    0 1677    0
##   oma's pad     0 1797    0    0
##   oma's prak    0    0 1704    0
##   opa schat     0    0    0 1695
##   opa Smat   1789    0    0    0
##   opa's gat     0    0    0 1697
##   opa's mat  1762    0    0    0
table(df$seg, df$traj)
##    
##            LA TB_X TB_XZ TB_Z TT_X TT_XZ TT_Z
##   É‘   93    0   59  2154  332    0     0    0
##   k   44    0    0   159  683    0     0    0
##   m   23  869    0     0    0    0     0    0
##   p   52 1749    0     0    0    0     0    0
##   r   79    0   18    54   31    0   285  262
##   s  109    0    0     0    0    0  3357   60
##   t   67    0    0     0    0   58  2349  207
##   X   37    0  142   298  305    0     0    0

Remove unreliable /sx/ segmentation of PD25.

df <- df[!(df$subj == "PD25" & df$cluster == "sx"),]

2 Preparing data for within cluster analysis

/sp/ CC lag

# collect offset times /s/
offset.s <- df %>%
  rowwise() %>%
  filter(prompt == "oma spat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  select(subj, recording.no, t3, t5) %>%
  rename(t3.s = t3) %>%
  rename(t5.s = t5) 

# collect onset times /p/ and merge df's
sp.CC <- df %>%
  rowwise() %>%
  filter(prompt == "oma spat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "p") %>%
  select(subj, recording.no, prompt, block, condition, group, cluster, duration.tt, duration.mt,  t3, t5) %>%
  rename(t3.p = t3) %>%
  rename(t5.p = t5) %>%
  inner_join(offset.s, by=c("subj", "recording.no")) %>%
  mutate(time.lag = t3.p-t5.s)  %>%
  mutate(time.lag.norm = time.lag/(t5.p - t3.s))  %>%
  mutate(dur.C1.norm = (t5.s - t3.s)/(t5.p - t3.s)) %>%
  mutate(dur.C2.norm = (t5.p - t3.p)/(t5.p - t3.s))


plot(sp.CC$time.lag.norm, sp.CC$dur.C2.norm)

cor(sp.CC$time.lag.norm, sp.CC$dur.C2.norm, use = "complete.obs")
## [1] -0.4257756
plot(sp.CC$time.lag.norm[sp.CC$group == "CTRL"], sp.CC$dur.C1.norm[sp.CC$group == "CTRL"])

plot(sp.CC$time.lag.norm[sp.CC$group == "PD"], sp.CC$dur.C1.norm[sp.CC$group == "PD"])

rm(offset.s)

# getting rid of NA's
sp.CC <- na.omit(sp.CC, cols="time.lag.norm")

# plot
ggplot(sp.CC, aes(x=subj, y=time.lag.norm, fill=group, na.rm = T)) + geom_boxplot() + labs(title = "CTRL: /sp/", y = "Lag between C's")

/sm/ CC lag

# collect  offset times /s/
offset.s <- df %>%
  rowwise() %>%
  filter(prompt == "opa Smat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  select(subj, recording.no, t3, t5) %>%
  rename(t5.s = t5, t3.s = t3)
  
# collect offset times /m/
sm.CC <- df %>%
  rowwise() %>%
  filter(prompt == "opa Smat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "m") %>%
  select(subj, recording.no, prompt, block, condition, group, cluster, duration.tt, duration.mt,  t3, t5) %>%
  rename(t3.m = t3, t5.m = t5)  %>%
  inner_join(offset.s, by=c("subj", "recording.no")) %>%
  mutate(time.lag = t3.m-t5.s) %>%
  mutate(time.lag.norm = time.lag/(t5.m - t3.s))  %>%
  mutate(dur.C1.norm = (t5.s - t3.s)/(t5.m - t3.s))  %>%
  mutate(dur.C2.norm = (t5.m - t3.m)/(t5.m - t3.s))


plot(sm.CC$time.lag.norm, sm.CC$dur.C2.norm)

cor(sm.CC$time.lag.norm, sm.CC$dur.C2.norm, use = "complete.obs")
## [1] -0.1151117
plot(sm.CC$time.lag.norm[sm.CC$group == "CTRL"], sm.CC$dur.C1.norm[sm.CC$group == "CTRL"], use = "complete.obs")
## Warning in plot.window(...): "use" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "use" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "use" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "use" is not a
## graphical parameter
## Warning in box(...): "use" is not a graphical parameter
## Warning in title(...): "use" is not a graphical parameter

plot(sm.CC$time.lag.norm[sm.CC$group == "PD"], sm.CC$dur.C1.norm[sm.CC$group == "PD"], use = "complete.obs")
## Warning in plot.window(...): "use" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "use" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "use" is not a
## graphical parameter

## Warning in axis(side = side, at = at, labels = labels, ...): "use" is not a
## graphical parameter
## Warning in box(...): "use" is not a graphical parameter
## Warning in title(...): "use" is not a graphical parameter

rm(offset.s)

# getting rid of NA's
sm.CC <- na.omit(sm.CC, cols="time.lag.norm")

# plot
ggplot(sm.CC, aes(x=subj, y=time.lag.norm, fill=group, na.rm = T)) + geom_boxplot() + labs(title = "PD: /sm/", y = "Lag between C's (ms)")

/sx/ CC lag

# collect offset times /s/
offset.s <- df %>%
  rowwise() %>%
  filter(prompt == "opa schat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  select(subj, recording.no, t3, t5) %>%
  rename(t3.s = t3, t5.s = t5)

# collect onset times /x/
sx.CC <- df %>%
  rowwise() %>%
  filter(prompt == "opa schat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "X") %>%
  select(subj, recording.no, prompt, block, condition, group, cluster, duration.tt, duration.mt,  t3, t5) %>%
  rename(t3.x = t3, t5.x = t5)  %>%
  inner_join(offset.s, by=c("subj", "recording.no")) %>%
  mutate(time.lag = t3.x-t5.s) %>%
  mutate(time.lag.norm = time.lag/(t5.x - t3.s)) %>%
  mutate(dur.C1.norm = (t5.s - t3.s)/(t5.x - t3.s))  %>%
  mutate(dur.C2.norm = (t5.x - t3.x)/(t5.x - t3.s))


plot(sx.CC$time.lag.norm, sx.CC$dur.C2.norm)

cor(sx.CC$time.lag.norm, sx.CC$dur.C2.norm, use = "complete.obs")
## [1] -0.9249422
plot(sx.CC$time.lag.norm[sx.CC$group == "CTRL"], sx.CC$dur.C1.norm[sx.CC$group == "CTRL"])

plot(sx.CC$time.lag.norm[sx.CC$group == "PD"], sx.CC$dur.C1.norm[sx.CC$group == "PD"])

# getting rid of NA's
sx.CC <- na.omit(sx.CC, cols="time.lag.norm")


# plot
ggplot(sx.CC, aes(x=subj, y=time.lag.norm, fill=group, na.rm = T)) + geom_boxplot() + labs(title = "PD: /sx/", y = "Lag between C's (ms)")

/s-r/ CC lag

# collect on and offset times /s/
offset.s <- df %>%
  rowwise() %>%
  filter(prompt == "oma's prak") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  select(subj, recording.no, t3, t5) %>%
  rename(t3.s = t3, t5.s = t5)

# collect on and offset times /p/
offset.p <- df %>%
  rowwise() %>%
  filter(prompt == "oma's prak") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "p") %>%
  select(subj, recording.no, t3, t5) %>%
  rename(t3.p = t3, t5.p = t5)

# collect on and offset times /r/
spr.CC <- df %>%
  rowwise() %>%
  filter(prompt == "oma's prak") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "r") %>%
  select(subj, recording.no, prompt, block, condition, group, cluster, duration.tt, duration.mt,  t3, t5) %>%
  rename(t3.r = t3, t5.r = t5)  %>%
  inner_join(offset.s, by=c("subj", "recording.no")) %>%
  left_join(offset.p, by=c("subj", "recording.no")) %>%
  mutate(time.lag = t3.r-t5.s) %>%
  mutate(time.lag.norm = time.lag/(t5.r - t3.s)) %>%
  mutate(dur.C1.norm = (t5.s - t3.s)/(t5.r - t3.s))  %>%
  mutate(dur.C2.norm = (t5.p - t3.p)/(t5.r - t3.s))  %>%
  mutate(dur.C3.norm = (t5.r - t3.r)/(t5.r - t3.s))


plot(spr.CC$time.lag.norm, spr.CC$dur.C2.norm)

cor(spr.CC$time.lag.norm, spr.CC$dur.C2.norm, use = "complete.obs")  
## [1] 0.1506096
plot(spr.CC$time.lag.norm, spr.CC$dur.C3.norm)

cor(spr.CC$time.lag.norm, spr.CC$dur.C3.norm, use = "complete.obs")
## [1] -0.48932
cor(spr.CC$time.lag.norm[spr.CC$group == "CTRL"], spr.CC$dur.C1.norm[spr.CC$group == "CTRL"])
## [1] NA
cor(spr.CC$time.lag.norm[spr.CC$group == "PD"], spr.CC$dur.C1.norm[spr.CC$group == "PD"])
## [1] NA
plot(spr.CC$time.lag.norm[spr.CC$group == "CTRL"], spr.CC$dur.C1.norm[spr.CC$group == "CTRL"])

plot(spr.CC$time.lag.norm[spr.CC$group == "PD"], spr.CC$dur.C1.norm[spr.CC$group == "PD"])

rm(offset.s)

# getting rid of NA's
spr.CC <- na.omit(spr.CC, cols="time.lag.norm")

# plot
ggplot(spr.CC, aes(x=subj, y=time.lag.norm, fill=group, na.rm = T)) + geom_boxplot() + labs(title = "PD: /spr/", y = "Lag between /s/ and /r/ (ms)")

Merge.

df.CC.lag<- rbind(sm.CC, sp.CC, sx.CC, spr.CC)
rm(sm.CC, sp.CC, sx.CC, spr.CC)

2.1 Some plots

dodge <- position_dodge(width = 0.9)

ggplot(df.CC.lag, aes(x=condition, y=time.lag, fill=group)) + geom_violin() + labs(title = "CC lag all clusters", y = "Lag between C's") + geom_boxplot(width=.2, position = dodge)

ggplot(df.CC.lag[df.CC.lag$cluster == "sp",], aes(x=condition, y=time.lag.norm, fill=group)) + geom_violin() + labs(title = "CC lag /sp/", y = "Lag between C's") + geom_boxplot(width=.2, position = dodge)

ggplot(df.CC.lag[df.CC.lag$cluster == "sm",], aes(x=condition, y=time.lag.norm, fill=group)) + geom_violin() + labs(title = "CC lag /sm/", y = "Lag between C's") + geom_boxplot(width=.2, position = dodge)

ggplot(df.CC.lag[df.CC.lag$cluster == "sx",], aes(x=condition, y=time.lag.norm, fill=group)) + geom_violin() + labs(title = "CC lag /sx/", y = "Lag between C's") + geom_boxplot(width=.2, position = dodge)

ggplot(df.CC.lag[df.CC.lag$cluster == "spr",], aes(x=condition, y=time.lag.norm, fill=group)) + geom_violin() + labs(title = "CC lag /spr/", y = "Lag between /s/ and /r/") + geom_boxplot(width=.2, position = dodge)

3 Examining duration of C’s in onset.

df <- df %>%
    rowwise() %>%
    mutate(dur=t5 - t3) %>%
    group_by(subj, cluster,  seg) %>%
    mutate(norm.dur = scale(dur))


mean.dur.C <- df %>%
  group_by(cluster, condition, subj) %>%
  filter(condition == "C") %>%
  filter(seg == "p"| seg == "m" | seg == "X") %>%
  group_by(seg) %>%
  select(cluster, condition, subj, seg, dur) %>%
  mutate(mean.dur.C = mean(dur, na.rm=T)) %>%
  distinct(subj, seg, mean.dur.C)

diff <- df %>%
  group_by(cluster, condition, subj) %>%
  filter(condition == "CC") %>%
  filter(seg == "p"| seg == "m" | seg == "X") %>%
  group_by(seg, subj) %>%
  select(cluster, condition, subj, seg, dur) %>%
  mutate(mean.dur.CC = mean(dur, na.rm=T)) %>%
  distinct(subj, seg, mean.dur.CC) %>%
  inner_join(mean.dur.C, by=c("subj", "seg")) %>%
  mutate(diff = mean.dur.C - mean.dur.CC)

diff %>%
  group_by(seg) %>%
  summarize(mean = mean(diff, na.rm=T), sd = sd(diff, na.rm=T))
## `summarise()` ungrouping output (override with `.groups` argument)

Plot durations of right C per group:

require(gridExtra)
## Loading required package: gridExtra
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
plot1 <-  ggplot(df[df$cluster == 'sp' & df$seg == "p",], aes(x=group, y=norm.dur, fill=condition)) + labs(title = "Duration /p/", y = "Duration (ms)", x = "Group") + geom_boxplot(width=.2, position = dodge)

plot2 <- ggplot(df[df$cluster == 'sm' & df$seg == "m",], aes(x=group, y=norm.dur, fill=condition)) + labs(title = "Duration /m/", y = "Duration (ms)", x = "Group") + geom_boxplot(width=.2, position = dodge)

plot3 <-  ggplot(df[df$cluster == 'sx' & df$seg == "X",], aes(x=group, y=norm.dur, fill=condition))  + labs(title = "Duration /x/", y = "Duration (ms)", x = "Group") + geom_boxplot(width=.2, position = dodge)

plot4 <-  ggplot(df[df$cluster == 'spr' & df$seg == "r",], aes(x=group, y=norm.dur, fill=condition)) + labs(title = "Duration /r/", y = "Duration (ms)", x = "Group") + geom_boxplot(width=.2, position = dodge)

grid.arrange(plot1, plot2, plot3, plot4, ncol=2)
## Warning: Removed 14 rows containing non-finite values (stat_boxplot).
## Warning: Removed 28 rows containing non-finite values (stat_boxplot).
## Warning: Removed 49 rows containing non-finite values (stat_boxplot).
## Warning: Removed 90 rows containing non-finite values (stat_boxplot).

rm(plot1, plot2, plot3, plot4)

Plot durations of right C per subject:

ggplot(df[df$cluster == 'sm' & df$seg == "m" & df$group == "CTRL",], aes(x=subj, y=dur, fill=condition)) + geom_violin() + labs(title = "duration /m/ CTRL", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 8 rows containing non-finite values (stat_ydensity).
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).

ggplot(df[df$cluster == 'sm' & df$seg == "m" & df$group == "PD",], aes(x=subj, y=dur, fill=condition)) + geom_violin() + labs(title = "duration /m/ PD", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 20 rows containing non-finite values (stat_ydensity).
## Warning: Removed 20 rows containing non-finite values (stat_boxplot).

ggplot(df[df$cluster == 'sp' & df$seg == "p" & df$group == "CTRL",], aes(x=subj, y=dur, fill=condition)) + geom_violin() + labs(title = "duration /p/ CTRL", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 12 rows containing non-finite values (stat_ydensity).
## Warning: Removed 12 rows containing non-finite values (stat_boxplot).

ggplot(df[df$cluster == 'sp' & df$seg == "p" & df$group == "PD",], aes(x=subj, y=dur, fill=condition)) + geom_violin() + labs(title = "duration /p/ PD", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 4 rows containing non-finite values (stat_ydensity).
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).

Plot duration of first C per group

ggplot(df[df$cluster == 'sm' & df$seg == "s",], aes(x=group, y=dur, fill=condition)) + geom_violin() + labs(title = "duration /s/ in /sm/ and /s#m/", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 29 rows containing non-finite values (stat_ydensity).
## Warning: Removed 29 rows containing non-finite values (stat_boxplot).

ggplot(df[df$cluster == 'sp' & df$seg == "s",], aes(x=group, y=dur, fill=condition)) + geom_violin() + labs(title = "duration /s/ in /sp/ and /s#p/", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 15 rows containing non-finite values (stat_ydensity).
## Warning: Removed 15 rows containing non-finite values (stat_boxplot).

Compare duration of C’s in onset per group

ggplot(df[df$cluster == 'sm' & df$seg != "É‘" & df$seg != "t" & df$condition == "CC",], aes(x=group, y=dur, fill=seg)) + geom_violin() + labs(title = "duration /s/ and /m/ in CC, CTRL", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 40 rows containing non-finite values (stat_ydensity).
## Warning: Removed 40 rows containing non-finite values (stat_boxplot).

ggplot(df[df$cluster == 'sp' & df$seg != "É‘" & df$seg != "t" & df$condition == "CC",], aes(x=group, y=dur, fill=seg)) + geom_violin() + labs(title = "duration /s/ and /p/ in CC, CTRL", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 17 rows containing non-finite values (stat_ydensity).
## Warning: Removed 17 rows containing non-finite values (stat_boxplot).

Compare duration of C’s in onset per subj

ggplot(df[df$group == 'CTRL' & df$cluster == 'sm' & df$seg != "É‘" & df$seg != "t" & df$condition == "CC",], aes(x=subj, y=dur, fill=seg)) + geom_violin() + labs(title = "duration /s/ and/m/ in CC, CTRL", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 8 rows containing non-finite values (stat_ydensity).
## Warning: Removed 8 rows containing non-finite values (stat_boxplot).

ggplot(df[df$group == 'PD' & df$cluster == 'sm' & df$seg != "É‘" & df$seg != "t" & df$condition == "CC",], aes(x=subj, y=dur, fill=seg)) + geom_violin() + labs(title = "duration /s/ and/m/ in CC, PD", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 32 rows containing non-finite values (stat_ydensity).
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).

ggplot(df[df$group == 'CTRL' & df$cluster == 'sp' & df$seg != "É‘" & df$seg != "t" & df$condition == "CC",], aes(x=subj, y=dur, fill=seg)) + geom_violin() + labs(title = "duration /s/ and/p/ in CC, CTRL", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 13 rows containing non-finite values (stat_ydensity).
## Warning: Removed 13 rows containing non-finite values (stat_boxplot).

ggplot(df[df$group == 'PD' & df$cluster == 'sp' & df$seg != "É‘" & df$seg != "t" & df$condition == "CC",], aes(x=subj, y=dur, fill=seg)) + geom_violin() + labs(title = "duration /s/ and /p/ in CC, PD", y = "duration") + geom_boxplot(width=.2, position = dodge) + ylim(0, 300)
## Warning: Removed 4 rows containing non-finite values (stat_ydensity).
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).

## Density plots duration

ggplot(df[df$cluster == 'sp' & df$seg == "p" & df$group == "CTRL",], aes(x=dur, fill = condition)) +
geom_density(alpha=0.4) +  ggtitle("Distribution duration /p/, CTRL") + xlim(0, 200)
## Warning: Removed 14 rows containing non-finite values (stat_density).

ggplot(df[df$cluster == 'sp' & df$seg == "p" & df$group == "PD",], aes(x=dur, fill = condition)) +
geom_density(alpha=0.4) +  ggtitle("Distribution duration /p/, PD") + xlim(0, 200)
## Warning: Removed 5 rows containing non-finite values (stat_density).

ggplot(df[df$cluster == 'sm' & df$seg == "m" & df$group == "CTRL",], aes(x=dur, fill = condition)) +
geom_density(alpha=0.4) +  ggtitle("Distribution duration /m/, CTRL") + xlim(0, 200)
## Warning: Removed 10 rows containing non-finite values (stat_density).

ggplot(df[df$cluster == 'sm' & df$seg == "m" & df$group == "PD",], aes(x=dur, fill = condition)) +
geom_density(alpha=0.4) +  ggtitle("Distribution duration /m/, PD") + xlim(0, 200)
## Warning: Removed 21 rows containing non-finite values (stat_density).

ggplot(df[df$cluster == 'sx' & df$seg == "X" & df$group == "CTRL",], aes(x=dur, fill = condition)) +
geom_density(alpha=0.4) +  ggtitle("Distribution duration /x/, CTRL") + xlim(0, 200)
## Warning: Removed 41 rows containing non-finite values (stat_density).

ggplot(df[df$cluster == 'sx' & df$seg == "X" & df$group == "PD",], aes(x=dur, fill = condition)) +
geom_density(alpha=0.4) +  ggtitle("Distribution duration /x/, PD") + xlim(0, 200)
## Warning: Removed 14 rows containing non-finite values (stat_density).

4 Examining landmarks

4.1 Derive temporal midpoint of nucleus

df <- df %>%
  rowwise() %>%
  mutate(mid.nuc=mean(c(t3, t5), na.rm=T)) 

4.2 Examine relation gestural onset/offest to nucleus midpoint

df.density <- df %>%
  rowwise() %>%
  mutate(GONS = mid.nuc - t1) %>%
  mutate(GOFF = t7 - mid.nuc)  %>%
  gather(side, time.to.mid, GONS:GOFF, factor_key=TRUE, na.rm = T)

  ggplot(df.density[df.density$condition == 'C' & df.density$seg == "s",], aes(x=time.to.mid, fill = side)) +
  geom_density(alpha=0.4) +  ggtitle("Distribution time to midpoint /s/ in C")

  ggplot(df.density[df.density$condition == 'CC' & df.density$seg == "s",], aes(x=time.to.mid, fill = side)) +
  geom_density(alpha=0.4) +  ggtitle("Distribution time to midpoint /s/ in CC")

  ggplot(df.density[df.density$condition == 'C' & df.density$seg == "m",], aes(x=time.to.mid, fill = side)) +
  geom_density(alpha=0.4) +  ggtitle("Distribution time to midpoint /m/ in C")

    ggplot(df.density[df.density$condition == 'CC' & df.density$seg == "m",], aes(x=time.to.mid, fill = side)) +
  geom_density(alpha=0.4) +  ggtitle("Distribution time to midpoint /m/ in CC")

      ggplot(df.density[df.density$condition == 'C' & df.density$seg == "p",], aes(x=time.to.mid, fill = side)) +
  geom_density(alpha=0.4) +  ggtitle("Distribution time to midpoint /p/ in C")

    ggplot(df.density[df.density$condition == 'CC' & df.density$seg == "p",], aes(x=time.to.mid, fill = side)) +
  geom_density(alpha=0.4) +  ggtitle("Distribution time to midpoint /p/ in CC")

      ggplot(df.density[df.density$condition == 'C' & df.density$seg == "X",], aes(x=time.to.mid, fill = side)) +
  geom_density(alpha=0.4) +  ggtitle("Distribution time to midpoint /x/ in C")

    ggplot(df.density[df.density$condition == 'CC' & df.density$seg == "X",], aes(x=time.to.mid, fill = side)) +
  geom_density(alpha=0.4) +  ggtitle("Distribution time to midpoint /x/ in CC")

5 Preparing data for C-center analysis based on nucleus on and offset

5.1 Calculate lag’s for /sp/

# derive time of nucleus midpoint onset /p/ 
mid.nuc.p <- df %>%
  filter(prompt == "oma spat" | prompt == "oma's pad" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "p") %>%
  rename(mid.nuc.p = mid.nuc) %>%
  select(subj, prompt, recording.no, mid.nuc.p)
  
# derive time of nucleus midpoint onset /s/ 
mid.nuc.s <- df %>%
  filter(prompt == "oma spat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  rename(mid.nuc.s = mid.nuc) %>%
  select(subj, prompt, recording.no,  mid.nuc.s) 

# calculate C-center
sp.c.center <- mid.nuc.p %>%
  filter(prompt == "oma spat" | prompt == "oma's pad" ) %>%
  left_join(mid.nuc.s, by=c("subj", "prompt", "recording.no")) %>%
  rowwise() %>%
  mutate(c.center = case_when(prompt == "oma's pad" ~ mid.nuc.p, prompt == "oma spat" ~ mean(c(mid.nuc.s, mid.nuc.p)))) %>%
  select(subj, recording.no, c.center) 

rm(mid.nuc.p, mid.nuc.s)

Get rightedge /sp/

# collect offset times /p/ 
sp.rightedge <- df %>%
  filter(prompt == "oma spat" | prompt == "oma's pad" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "p") %>%
  rename(rightedge = t5) %>%
  select(subj, recording.no, rightedge) 

Get leftedge /sp/

# collect onset times /s/ 
 sp.leftedge.CC <- df %>%
  filter(prompt == "oma spat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  rename(leftedge = t3) %>%
  select(subj, recording.no, leftedge) 

Get leftedge /p/

# collect offset times /p/ 
sp.leftedge.C <- df %>%
  filter(prompt == "oma's pad") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "p") %>%
  rename(leftedge = t3) %>%
  select(subj, recording.no, leftedge) 

Merge

sp.leftedge <- rbind(sp.leftedge.CC, sp.leftedge.C)

Get anchor /t/

# collect onset times coda /t/
sp.anchor <- df %>%
  filter(prompt == "oma spat" | prompt == "oma's pad" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "t") %>%
  rename(anchor = t3) %>%
  select(subj, recording.no, anchor) 

Merge and remove obsolete df’s

df.C.center.sp <- df %>%
  filter(prompt == "oma spat" | prompt == "oma's pad" ) %>%
  left_join(sp.c.center, by =c("subj", "recording.no")) %>%
  left_join(sp.rightedge, by =c("subj", "recording.no")) %>%
  left_join(sp.leftedge, by =c("subj", "recording.no")) %>%
  left_join(sp.anchor, by =c("subj", "recording.no"))

rm(sp.c.center, sp.rightedge, sp.anchor)

5.2 Calculate lag’s for /sm/

# collect mid.nuc time onset /m/ 
mid.nuc.m <- df %>%
  filter(prompt == "opa Smat" | prompt == "opa's mat" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "m") %>%
  rename(mid.nuc.m = mid.nuc) %>%
  select(subj, prompt, recording.no, mid.nuc.m)

# collect mid.nuc time onset /s/
mid.nuc.s <- df %>%
  filter(prompt == "opa Smat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  rename(mid.nuc.s = mid.nuc) %>%
  select(subj, prompt, recording.no,  mid.nuc.s) 

# calculate C-center
sm.c.center <- mid.nuc.m %>%
  filter(prompt == "opa Smat" | prompt == "opa's mat" ) %>%
  left_join(mid.nuc.s, by=c("subj", "prompt", "recording.no")) %>%
  rowwise() %>%
  mutate(c.center = case_when(prompt == "opa's mat" ~ mid.nuc.m, prompt == "opa Smat" ~ mean(c(mid.nuc.s, mid.nuc.m)))) %>%
  select(subj, recording.no, c.center) 

rm(mid.nuc.m, mid.nuc.s)

Get rightedge /sm/

# collect offset times /m/ 
sm.rightedge <- df %>%
  filter(prompt == "opa Smat" | prompt == "opa's mat" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "m") %>%
  rename(rightedge = t5) %>%
  select(subj, recording.no, rightedge) 

Get leftedge /sm/

# collect onset times /s/ 
 sm.leftedge.CC <- df %>%
  filter(prompt == "opa Smat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  rename(leftedge = t3) %>%
  select(subj, recording.no, leftedge) 

Get leftedge /m/

# collect onset times /m/ 
sm.leftedge.C <- df %>%
  filter(prompt == "opa's mat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "m") %>%
  rename(leftedge = t3) %>%
  select(subj, recording.no, leftedge) 

Merge

sm.leftedge <- rbind(sm.leftedge.CC, sm.leftedge.C)

Get anchor /t/

# collect onset times coda /t/
sm.anchor <- df %>%
  filter(prompt == "opa Smat" | prompt == "opa's mat" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "t") %>%
  rename(anchor = t3) %>%
  select(subj, recording.no, anchor) 

Merge and remove obsolete df’s

df.C.center.sm <- df %>%
  filter(prompt == "opa Smat" | prompt == "opa's mat" ) %>%
  left_join(sm.c.center, by =c("subj", "recording.no")) %>%
  left_join(sm.rightedge, by =c("subj", "recording.no")) %>%
  left_join(sm.leftedge, by =c("subj", "recording.no")) %>%
  left_join(sm.anchor, by =c("subj", "recording.no"))

rm(sm.c.center, sm.rightedge, sm.anchor)

5.3 Calculate lag’s for /sx/

# collect mid.nuc time onset /x/ 
mid.nuc.x <- df %>%
  filter(prompt == "opa schat" | prompt == "opa's gat" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "X") %>%
  rename(mid.nuc.x = mid.nuc) %>%
  select(subj, prompt, recording.no, mid.nuc.x)
  
# collect mid.nuc time onset /s/
mid.nuc.s <- df %>%
  filter(prompt == "opa schat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  rename(mid.nuc.s = mid.nuc) %>%
  select(subj, prompt, recording.no,  mid.nuc.s) 

# calculate C-center
sx.c.center <- mid.nuc.x %>%
  filter(prompt == "opa schat" | prompt == "opa's gat" ) %>%
  left_join(mid.nuc.s, by=c("subj", "prompt", "recording.no")) %>%
  rowwise() %>%
  mutate(c.center = case_when(prompt == "opa's gat" ~ mid.nuc.x, prompt == "opa schat" ~ mean(c(mid.nuc.s, mid.nuc.x)))) %>%
  select(subj, recording.no, c.center) 

rm(mid.nuc.x, mid.nuc.s)

Get rightedge /sx/

# collect offset times /x/ 
sx.rightedge <- df %>%
  filter(prompt == "opa schat" | prompt == "opa's gat" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "X") %>%
  rename(rightedge = t5) %>%
  select(subj, recording.no, rightedge) 

Get leftedge /sx/

# collect onset times /s/ 
 sx.leftedge.CC <- df %>%
  filter(prompt == "opa schat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  rename(leftedge = t3) %>%
  select(subj, recording.no, leftedge) 

Get leftedge /x/

# collect onset times /m/ 
sx.leftedge.C <- df %>%
  filter(prompt == "opa's gat") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "X") %>%
  rename(leftedge = t3) %>%
  select(subj, recording.no, leftedge) 

Merge

sx.leftedge <- rbind(sx.leftedge.CC, sx.leftedge.C)

Get anchor /t/

# collect onset times coda /t/
sx.anchor <- df %>%
  filter(prompt == "opa schat" | prompt == "opa's gat" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "t") %>%
  rename(anchor = t3) %>%
  select(subj, recording.no, anchor) 

Merge and remove obsolete df’s

df.C.center.sx <- df %>%
  filter(prompt == "opa schat" | prompt == "opa's gat" ) %>%
  left_join(sx.c.center, by =c("subj", "recording.no")) %>%
  left_join(sx.rightedge, by =c("subj", "recording.no")) %>%
  left_join(sx.leftedge, by =c("subj", "recording.no")) %>%
  left_join(sx.anchor, by =c("subj", "recording.no"))

rm(sx.c.center, sx.rightedge, sx.anchor)

5.4 Calculate lag’s for /spr/

# collect mid.nuc time onset /r/ 
mid.nuc.r <- df %>%
  filter(prompt == "oma sprak" | prompt == "oma's prak" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "r") %>%
  rename(mid.nuc.r = mid.nuc) %>%
  select(subj, prompt, recording.no, mid.nuc.r)
  
# collect mid.nuc time onset /p/
mid.nuc.p <- df %>%
  filter(prompt == "oma sprak") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "p") %>%
  rename(mid.nuc.p = mid.nuc) %>%
  select(subj, prompt, recording.no,  mid.nuc.p) 

# collect mid.nuc time onset /s/
mid.nuc.s <- df %>%
  filter(prompt == "oma sprak") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  rename(mid.nuc.s = mid.nuc) %>%
  select(subj, prompt, recording.no,  mid.nuc.s) 

# calculate C-center
spr.c.center <- mid.nuc.r %>%
  filter(prompt == "oma sprak" | prompt == "oma's prak" ) %>%
  left_join(mid.nuc.s, by=c("subj", "prompt", "recording.no")) %>%
  left_join(mid.nuc.p, by=c("subj", "prompt", "recording.no")) %>%
  rowwise() %>%
  mutate(c.center = case_when(prompt == "oma's prak" ~ mid.nuc.r, prompt == "oma sprak" ~ mean(c(mid.nuc.s, mid.nuc.p, mid.nuc.r)))) %>%
  select(subj, recording.no, c.center) 

rm(mid.nuc.r, mid.nuc.s, mid.nuc.p)

Get rightedge /spr/

# collect offset times /r/ 
spr.rightedge <- df %>%
  filter(prompt == "oma sprak" | prompt == "oma's prak" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "r") %>%
  rename(rightedge = t5) %>%
  select(subj, recording.no, rightedge) 

Get leftedge /spr/

# collect onset times /s/ 
spr.leftedge.CCC <- df %>%
  filter(prompt == "oma sprak") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "s") %>%
  rename(leftedge = t3) %>%
  select(subj, recording.no, leftedge) 

Get leftedge /pr/

# collect onset times /p/ 
spr.leftedge.CC <- df %>%
  filter(prompt == "oma's prak") %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "p") %>%
  rename(leftedge = t3) %>%
  select(subj, recording.no, leftedge) 

Merge

spr.leftedge <- rbind(spr.leftedge.CCC, spr.leftedge.CC)

Get anchor /k/

# collect onset times coda /t/
spr.anchor <- df %>%
  filter(prompt == "oma sprak" | prompt == "oma's prak" ) %>%
  group_by(subj) %>%
  group_by(recording.no) %>%
  filter(seg == "k") %>%
  rename(anchor = t3) %>%
  select(subj, recording.no, anchor) 

Merge and remove obsolete df’s

df.C.center.spr <- df %>%
  filter(prompt == "oma sprak" | prompt == "oma's prak" ) %>%
  left_join(spr.c.center, by =c("subj", "recording.no")) %>%
  left_join(spr.rightedge, by =c("subj", "recording.no")) %>%
  left_join(spr.leftedge, by =c("subj", "recording.no")) %>%
  left_join(spr.anchor, by =c("subj", "recording.no"))

rm(spr.c.center, spr.rightedge, spr.anchor)

5.5 Merge C-center df’s

df.C.center <- rbind(df.C.center.sp, df.C.center.sm, df.C.center.sx, df.C.center.spr)
rm(df.C.center.sp, df.C.center.sm, df.C.center.sx, df.C.center.spr)

df.C.center <- df.C.center  %>%
    rowwise() %>%
    mutate(c.center.lag = anchor - c.center) %>%
    mutate(rightedge.lag = anchor - rightedge)%>%
    mutate(leftedge.lag = anchor - leftedge) 

5.6 Distribution

xtabs(c.center.lag ~ subj, data = df.C.center)
## subj
##   CTRL01   CTRL02   CTRL03   CTRL04   CTRL05   CTRL06   CTRL07   CTRL08 
## 68346.38 67985.00 66015.00 39041.67 61045.21 77442.77 59773.33 64839.58 
##   CTRL09   CTRL10   CTRL11   CTRL12   CTRL13   CTRL14   CTRL15   CTRL16 
## 52433.75 64502.50 87568.75 64269.17 47137.50 66209.38 61985.83 40075.00 
##   CTRL17   CTRL18   CTRL19   CTRL20   CTRL21   CTRL23   CTRL24   CTRL25 
## 34172.50 56182.92 24673.33 44129.38 67449.17 54278.33 43338.33 55485.00 
##     PD01     PD02     PD03     PD04     PD05     PD06     PD07     PD08 
## 45974.17 55416.25 46407.50 38454.17 59822.90 57576.25 52457.50 51268.75 
##     PD09     PD10     PD11     PD12     PD13     PD14     PD15     PD16 
## 61385.00 59955.00 53438.70 50727.50 66320.67 57586.25 59913.33 49079.17 
##     PD17     PD18     PD19     PD21     PD22     PD23     PD25          
## 60586.04 60655.42  9437.50 51785.83 49108.12 29948.33 37824.17     0.00

5.7 Some plots

5.7.1 Per group

/sm/

ggplot(df.C.center[df.C.center$cluster == 'sm',], aes(x=group, y=c.center.lag, fill=condition)) + geom_violin() + labs(title = "/sm/, c-center", y = "Lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 198 rows containing non-finite values (stat_ydensity).
## Warning: Removed 198 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sm',], aes(x=group, y=rightedge.lag, fill=condition)) + geom_violin() + labs(title = "/sm/, rightedge", y = "Lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 158 rows containing non-finite values (stat_ydensity).
## Warning: Removed 158 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sm',], aes(x=group, y=leftedge.lag, fill=condition)) + geom_violin() + labs(title = "/sm/, leftedge", y = "Lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 563 rows containing non-finite values (stat_ydensity).
## Warning: Removed 563 rows containing non-finite values (stat_boxplot).

/sp/

ggplot(df.C.center[df.C.center$cluster == 'sp',], aes(x=group, y=c.center.lag, fill=condition)) + geom_violin() + labs(title = "/sp/, c-center", y = "Lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 160 rows containing non-finite values (stat_ydensity).
## Warning: Removed 160 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sp',], aes(x=group, y=rightedge.lag, fill=condition)) + geom_violin() + labs(title = "/sp/, rightedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 104 rows containing non-finite values (stat_ydensity).
## Warning: Removed 104 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sp',], aes(x=group, y=leftedge.lag, fill=condition)) + geom_violin() + labs(title = "/sp/, leftedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 516 rows containing non-finite values (stat_ydensity).
## Warning: Removed 516 rows containing non-finite values (stat_boxplot).

/sx/

ggplot(df.C.center[df.C.center$cluster == 'sx',], aes(x=group, y=c.center.lag, fill=condition)) + geom_violin() + labs(title = "/sx/, c-center", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 558 rows containing non-finite values (stat_ydensity).
## Warning: Removed 558 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sx',], aes(x=group, y=rightedge.lag, fill=condition)) + geom_violin() + labs(title = "/sx/, rightedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 531 rows containing non-finite values (stat_ydensity).
## Warning: Removed 531 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sx',], aes(x=group, y=leftedge.lag, fill=condition)) + geom_violin() + labs(title = "/sx/, leftedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 604 rows containing non-finite values (stat_ydensity).
## Warning: Removed 604 rows containing non-finite values (stat_boxplot).

/spr/

ggplot(df.C.center[df.C.center$cluster == 'spr',], aes(x=group, y=c.center.lag, fill=condition)) + geom_violin() + labs(title = "/spr/, c-center", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 1156 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1156 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'spr',], aes(x=group, y=rightedge.lag, fill=condition)) + geom_violin() + labs(title = "/spr/, rightedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 1426 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1426 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'spr',], aes(x=group, y=leftedge.lag, fill=condition)) + geom_violin() + labs(title = "/spr/, leftedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 1346 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1346 rows containing non-finite values (stat_boxplot).

5.7.2 Per individual

/sm/

ggplot(df.C.center[df.C.center$cluster == 'sm' & df.C.center$group == 'CTRL',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sm/, c-center, CTRL", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 63 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sm' & df.C.center$group == 'CTRL',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sm/, rightedge, CTRL", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 50 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sm' & df.C.center$group == 'PD',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sm/, c-center, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 135 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sm' & df.C.center$group == 'PD',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sm/, rightedge, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 108 rows containing non-finite values (stat_boxplot).

/sp/

ggplot(df.C.center[df.C.center$cluster == 'sp' & df.C.center$group == 'CTRL',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sp/, c-center, CTRL", y = "lag") + geom_boxplot(width=.5 )+ ylim(80, 350)
## Warning: Removed 88 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sp' & df.C.center$group == 'CTRL',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sp/, rightedge, CTRL", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 59 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sp' & df.C.center$group == 'PD',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sp/, c-center, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 72 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sp' & df.C.center$group == 'PD',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sp/, rightedge, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 45 rows containing non-finite values (stat_boxplot).

/sx/

ggplot(df.C.center[df.C.center$cluster == 'sx' & df.C.center$group == 'CTRL',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sx/, c-center, CTRL", y = "lag") + geom_boxplot(width=.5 )+ ylim(80, 350)
## Warning: Removed 285 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sx' & df.C.center$group == 'CTRL',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sx/, rightedge, CTRL", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 278 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sx' & df.C.center$group == 'PD',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sx/, c-center, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 273 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sx' & df.C.center$group == 'PD',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sx/, rightedge, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 253 rows containing non-finite values (stat_boxplot).

/spr/

ggplot(df.C.center[df.C.center$cluster == 'spr' & df.C.center$group == 'CTRL',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/spr/, c-center, CTRL", y = "lag") + geom_boxplot(width=.5 )+ ylim(80, 350)
## Warning: Removed 549 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'spr' & df.C.center$group == 'CTRL',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/spr/, rightedge, CTRL", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 705 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'spr' & df.C.center$group == 'PD',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/spr/, c-center, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 607 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'spr' & df.C.center$group == 'PD',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/spr/, rightedge, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 721 rows containing non-finite values (stat_boxplot).

5.8 Some plots

5.8.1 Per group

/sm/

ggplot(df.C.center[df.C.center$cluster == 'sm',], aes(x=group, y=c.center.lag, fill=condition)) + geom_violin() + labs(title = "/sm/, c-center", y = "Lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 198 rows containing non-finite values (stat_ydensity).
## Warning: Removed 198 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sm',], aes(x=group, y=rightedge.lag, fill=condition)) + geom_violin() + labs(title = "/sm/, rightedge", y = "Lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 158 rows containing non-finite values (stat_ydensity).
## Warning: Removed 158 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sm',], aes(x=group, y=leftedge.lag, fill=condition)) + geom_violin() + labs(title = "/sm/, leftedge", y = "Lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 563 rows containing non-finite values (stat_ydensity).
## Warning: Removed 563 rows containing non-finite values (stat_boxplot).

/sp/

ggplot(df.C.center[df.C.center$cluster == 'sp',], aes(x=group, y=c.center.lag, fill=condition)) + geom_violin() + labs(title = "/sp/, c-center", y = "Lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 160 rows containing non-finite values (stat_ydensity).
## Warning: Removed 160 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sp',], aes(x=group, y=rightedge.lag, fill=condition)) + geom_violin() + labs(title = "/sp/, rightedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 104 rows containing non-finite values (stat_ydensity).
## Warning: Removed 104 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sp',], aes(x=group, y=leftedge.lag, fill=condition)) + geom_violin() + labs(title = "/sp/, leftedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 516 rows containing non-finite values (stat_ydensity).
## Warning: Removed 516 rows containing non-finite values (stat_boxplot).

/sx/

ggplot(df.C.center[df.C.center$cluster == 'sx',], aes(x=group, y=c.center.lag, fill=condition)) + geom_violin() + labs(title = "/sx/, c-center", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 558 rows containing non-finite values (stat_ydensity).
## Warning: Removed 558 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sx',], aes(x=group, y=rightedge.lag, fill=condition)) + geom_violin() + labs(title = "/sx/, rightedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 531 rows containing non-finite values (stat_ydensity).
## Warning: Removed 531 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sx',], aes(x=group, y=leftedge.lag, fill=condition)) + geom_violin() + labs(title = "/sx/, leftedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 604 rows containing non-finite values (stat_ydensity).
## Warning: Removed 604 rows containing non-finite values (stat_boxplot).

/spr/

ggplot(df.C.center[df.C.center$cluster == 'spr',], aes(x=group, y=c.center.lag, fill=condition)) + geom_violin() + labs(title = "/spr/, c-center", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 1156 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1156 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'spr',], aes(x=group, y=rightedge.lag, fill=condition)) + geom_violin() + labs(title = "/spr/, rightedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 1426 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1426 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'spr',], aes(x=group, y=leftedge.lag, fill=condition)) + geom_violin() + labs(title = "/spr/, leftedge", y = "lag") + geom_boxplot(width=.2, position = dodge) + ylim(80, 350)
## Warning: Removed 1346 rows containing non-finite values (stat_ydensity).
## Warning: Removed 1346 rows containing non-finite values (stat_boxplot).

5.8.2 Per individual

/sm/

ggplot(df.C.center[df.C.center$cluster == 'sm' & df.C.center$group == 'CTRL',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sm/, c-center, CTRL", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 63 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sm' & df.C.center$group == 'CTRL',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sm/, rightedge, CTRL", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 50 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sm' & df.C.center$group == 'PD',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sm/, c-center, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 135 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sm' & df.C.center$group == 'PD',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sm/, rightedge, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 108 rows containing non-finite values (stat_boxplot).

/sp/

ggplot(df.C.center[df.C.center$cluster == 'sp' & df.C.center$group == 'CTRL',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sp/, c-center, CTRL", y = "lag") + geom_boxplot(width=.5 )+ ylim(80, 350)
## Warning: Removed 88 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sp' & df.C.center$group == 'CTRL',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sp/, rightedge, CTRL", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 59 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sp' & df.C.center$group == 'PD',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sp/, c-center, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 72 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sp' & df.C.center$group == 'PD',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sp/, rightedge, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 45 rows containing non-finite values (stat_boxplot).

/sx/

ggplot(df.C.center[df.C.center$cluster == 'sx' & df.C.center$group == 'CTRL',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sx/, c-center, CTRL", y = "lag") + geom_boxplot(width=.5 )+ ylim(80, 350)
## Warning: Removed 285 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sx' & df.C.center$group == 'CTRL',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sx/, rightedge, CTRL", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 278 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sx' & df.C.center$group == 'PD',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/sx/, c-center, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 273 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'sx' & df.C.center$group == 'PD',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/sx/, rightedge, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 253 rows containing non-finite values (stat_boxplot).

/spr/

ggplot(df.C.center[df.C.center$cluster == 'spr' & df.C.center$group == 'CTRL',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/spr/, c-center, CTRL", y = "lag") + geom_boxplot(width=.5 )+ ylim(80, 350)
## Warning: Removed 549 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'spr' & df.C.center$group == 'CTRL',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/spr/, rightedge, CTRL", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 705 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'spr' & df.C.center$group == 'PD',], aes(x=subj, y=c.center.lag, fill=condition)) +  labs(title = "/spr/, c-center, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 607 rows containing non-finite values (stat_boxplot).

ggplot(df.C.center[df.C.center$cluster == 'spr' & df.C.center$group == 'PD',], aes(x=subj, y=rightedge.lag, fill=condition)) +  labs(title = "/spr/, rightedge, PD", y = "lag") + geom_boxplot(width=.5) + ylim(80, 350)
## Warning: Removed 721 rows containing non-finite values (stat_boxplot).

5.9 Derive standard deviations

df.RSD <- df.C.center %>%
  group_by(subj, cluster) %>%
  drop_na(leftedge.lag, rightedge.lag, c.center.lag) %>%
  mutate(sd.leftedge = sd(leftedge.lag)) %>%
  mutate(mean.dur.leftedge = mean(leftedge.lag)) %>%
  mutate(RSD.leftedge = 100*sd.leftedge/mean.dur.leftedge) %>%

  mutate(sd.rightedge = sd(rightedge.lag)) %>%
  mutate(mean.dur.rightedge = mean(rightedge.lag)) %>%
  mutate(RSD.rightedge = 100*sd.rightedge/mean.dur.rightedge) %>%
  
  mutate(sd.C.center = sd(c.center.lag)) %>%
  mutate(mean.dur.C.center = mean(c.center.lag)) %>%
  mutate(RSD.C.center = 100*sd.C.center/mean.dur.C.center) %>%
  distinct(subj, RSD.leftedge, RSD.rightedge, RSD.C.center, .keep_all = T)


ggplot(df.RSD, aes(x=subj, y=RSD.leftedge, fill = group)) + geom_boxplot()

ggplot(df.RSD, aes(x=subj, y=RSD.rightedge, fill = group)) + geom_boxplot()

ggplot(df.RSD, aes(x=subj, y=RSD.C.center, fill = group)) + geom_boxplot()

#transform wide format to long format
df.RSD.long <- df.RSD %>%
  group_by(subj, group, cluster) %>%
  gather(condition,RSD,c(RSD.leftedge, RSD.rightedge, RSD.C.center)) %>%
  select(subj, group, cluster, condition, RSD) %>%
  arrange(subj, group, cluster, condition, RSD)

df.RSD.long$subj <- droplevels(df.RSD.long$subj)
df.RSD.long$condition <- as.factor(df.RSD.long$condition)

#ezANOVA(RSD.long, dv=value, wid=subj, within=condition, between=group)

# display group means
df.RSD %>%
  group_by(group, cluster) %>%
  summarize(mean.leftedge = mean(RSD.leftedge), sd.leftedge = sd(RSD.leftedge), mean.rightedge = mean(RSD.rightedge), sd.rightedge = sd(RSD.rightedge), mean.C.center = mean(RSD.C.center), sd.C.center = sd(RSD.C.center)) %>%
gather(condition,value,c(mean.leftedge, mean.rightedge, mean.C.center))
## `summarise()` regrouping output by 'group' (override with `.groups` argument)

6 Save data

#add meta_data

setwd("/Users/45598770/Documents/analysis/data")
df_meta <- read.csv("metadata.csv", header = TRUE, sep = ",")
df_meta$gender <- as.factor(df_meta$gender)
df.CC.lag <-  merge(df.CC.lag, df_meta, by="subj")
rm(df_meta)


# Set variables that consist of most datapoints as reference variables
df.CC.lag$gender <- relevel(df.CC.lag$gender, ref = "M")
df.CC.lag$group <- relevel(df.CC.lag$group, ref = "CTRL")
df.CC.lag$dialect <- relevel(df.CC.lag$dialect, ref = "Rest")

saveRDS(df.CC.lag, file="modelling_data_CC.rds")
saveRDS(df.RSD.long, file="modelling_data_RSD.rds")